'%!in%' <- function(x,y)!('%in%'(x,y))

library(ggplot2)
library(stargazer)
library(patchwork)
# Load Data ---------------------------------------------------------------
load("lc_dat.rdata")
# Specify DVs ------------------------------------------------------------
# combining to DV list
DVS <- c("book_divchar", "book_divhist", "book_discrim",              
         "book_teachrace", "tellkid_equal", "tellkid_whiteworkharder",
         "tellkid_othersfacediscrim", "tellkid_blackrespectpolice", "tellkid_historicalfigs",
         "tellkid_whitediscrim", "tellkid_whiteprivilege", "buy_divchar",
         "movie_divchar", "changeschool", "attendblm",                 
         "blmsign", "communitymeeting", "antiracism_wkid",
         "antiracism_parenting")

# Specify RHS -------------------------------------------------------------
RHS <- c("age_lucid_sc", "woman", "edu_sc", "inc_sc",
         "zip_white_prop", "zip_black_prop", "zip_med_inc_sc", "zip_college_prop",
         "oldest_child_age_sc", "oldest_child_age_sc*woman", 
         "multkids", "multkids*woman", 
         "childcaretime_sc", "childcaretime_sc*woman", 
         "sahp", "sahp*woman", 
         "covidwork_hours", "covidwork_left",
         "pid7_sc", "ideo7_sc")


# Compare Distributions by Men and Women ------------------------------
struct_vars <- c("oldest_child_age_sc", "multkids", "childcaretime_sc", "sahp")

table(lc_dat$oldest_child_age_sc, lc_dat$woman)
chisq.test(table(lc_dat$oldest_child_age_sc, lc_dat$woman))

sd(lc_dat$oldest_child_age_sc[which(lc_dat$woman == 1)], na.rm = T)
sd(lc_dat$oldest_child_age_sc[which(lc_dat$woman == 0)], na.rm = T)
sd(lc_dat$childcaretime_sc[which(lc_dat$woman == 1)], na.rm = T)
sd(lc_dat$childcaretime_sc[which(lc_dat$woman == 0)], na.rm = T)


pdat <- lc_dat
pdat$woman <- factor(pdat$woman,
                     labels = c("Man", "Woman"))

age <- ggplot(subset(pdat, !is.na(woman)), 
       aes(x = oldest_child_age_sc, group = woman)) +
  geom_histogram(position = position_dodge(), aes(fill = woman)) +
  scale_fill_manual(values=c("black", "grey"), name = "") +
  theme_bw() +
  labs(x = "", y = "Count", title = "Oldest Child Age")
mkids <- ggplot(subset(pdat, !is.na(woman)), 
              aes(x = multkids, group = woman)) +
  geom_histogram(position = position_dodge(), aes(fill = woman)) +
  scale_fill_manual(values=c("black", "grey"), name = "") +
  theme_bw() +
  labs(x = "", y = "Count", title = "Mutiple Kids")
childcare <- ggplot(subset(pdat, !is.na(woman)), 
                aes(x = childcaretime_sc, group = woman)) +
  geom_histogram(position = position_dodge(), aes(fill = woman)) +
  scale_fill_manual(values=c("black", "grey"), name = "") +
  theme_bw() +
  labs(x = "", y = "Count", title = "Childcare Time")
sahp <- ggplot(subset(pdat, !is.na(woman)), 
                    aes(x = sahp, group = woman)) +
  geom_histogram(position = position_dodge(), aes(fill = woman)) +
  scale_fill_manual(values=c("black", "grey"), name = "") +
  theme_bw() +
  labs(x = "", y = "Count", title = "Stay at Home Parent")

(age + mkids) / (childcare + sahp) +
  plot_layout(guides = 'collect') & 
  theme(legend.position = "bottom")
# ggsave("demog_trends_prop.pdf", height = 10, width = 9)

# Model Loop --------------------------------------------------------------
# Formatting DF to store coefficients and p-values
coef_mat <- array(NA, c(length(DVS), length(RHS) + 1))
coef_mat <- as.data.frame(coef_mat)
names(coef_mat)[1] <- "var"
coef_mat$var <- DVS
sig_mat <- coef_mat

# empty list to store individual model results
model_results <- list()

for(i in 1:nrow(coef_mat)){
  rhs <- paste(RHS, collapse = "+")
  m <- lm(as.formula(paste(coef_mat$var[i], rhs, sep = "~")), 
          data = lc_dat)
  m_coef <- summary(m)$coefficients
  # store coefficients excluding intercept
  names(coef_mat)[-1] <- rownames(m_coef)[-1]
  coef_mat[i, -1] <- m_coef[-1, 1]
  # store p-values but for intercept
  names(sig_mat)[-1] <- rownames(m_coef)[-1]
  sig_mat[i, -1] <- m_coef[-1, 4]
  
  model_results[[i]] <- m
}


# summary call
lapply(model_results, summary)

# *Table -------------------------------------------------------------------
iv_labs <- c("Age",
             "Woman",
             "Education",
             "Income",
             "Zip: Prop White",
             "Zip: Prop Black",
             "Zip: Median Income",
             "Zip: Prop College Degree",
             "Oldest Child's Age",
             "Oldest Child's Age*Woman",
             "Multiple Kids",
             "Multiple Kids*Woman",
             "Childcare Time",
             "Childcare Time*Woman",
             "Stay at home Parent",
             "Stay at home Parent*Woman",
             "COVID Reduced Hours",
             "COVID Left Workforce",
             "Partisanship (Republican)",
             "Ideology (Conservative)")
dv_labs <- c("Anti-Racist Parenting Workshop",
             "Anti-Racist Workshop w/kid",
             "Attend BLM Protest",
             "Make BLM Sign",
             "Book on Discrimination",
             "Book w/ diverse characters",
             "Historical Books",
             "Book on Teaching about Race",
             "Toys w/diverse characters",
             "Change School",
             "Attend Community Meeting",
             "Movie w/diverse characters",
             "Tell Kids: Respect Police",
             "Tell Kids: People Equal",
             "Tell Kids: Historical Figures",
             "Tell Kids: Others Face Discrimination",
             "Tell Kids: We Face Discrimination",
             "Tell Kids: White Privilege",
             "Tell Kids: Whites work Harder")

# model results by DV order
stargazer(model_results[c(19,18,15,16,3,1)],
          title = "Correlates of Parenting Decisions",
          dep.var.caption = "",
          order = c(1:8, 9, 17, 10, 18, 11, 19, 12, 20, 13:16),
          # covariate.labels = iv_labs,
          column.labels = dv_labs[1:6],
          dep.var.labels.include = F,
          header = F, initial.zero = F,
          type = "text",
          label = "tab1_moderated", 
          # out = "table1_moderated.tex",
          digits = 3, omit.stat = c("f", "adj.rsq"),  df = F, no.space = T)

stargazer(model_results[c(2, 4, 12, 14, 17, 13, 8)],
          title = "Correlates of Parenting Decisions",
          dep.var.caption = "",
          order = c(1:8, 9, 17, 10, 18, 11, 19, 12, 20, 13:16),
          covariate.labels = iv_labs,
          column.labels = dv_labs[7:13],
          dep.var.labels.include = F,
          header = F, initial.zero = F,
          # type = "text", 
          label = "tab2_moderated", 
          out = "table2_moderated.tex",
          digits = 3, omit.stat = c("f", "adj.rsq"),  df = F, no.space = T)

stargazer(model_results[c(5, 9, 7, 10, 11, 6)],
          title = "Correlates of Parenting Decisions",
          dep.var.caption = "",
          order = c(1:8, 9, 17, 10, 18, 11, 19, 12, 20, 13:16),
          covariate.labels = iv_labs,
          column.labels = dv_labs[14:19],
          dep.var.labels.include = F,
          header = F, initial.zero = F,
          # type = "text", 
          label = "tab3_moderated", 
          out = "table3_moderated.tex",
          digits = 3, omit.stat = c("f", "adj.rsq"),  df = F, no.space = T)
